import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import os
os.getcwd()
crashdata = pd.read_csv("Crashes.csv")
crashdata.describe(include="all")
pd.set_option('display.max_columns', 500)
crashdata.head()
demo=crashdata[["Case Number", "County"]]
demo.head(6)
crashdata2 = crashdata[["Case Number", "County", "Crash Date", "Crash Day Of Week", "Crash Month", "Crash Year", "Crash Hour", "Crash Type", "Intersection", "Latitude", "Longitude","Pedestrians Killed", "Environmental Condition", "Severity", "Hazmat Involved", "Light Condition", "Surface Condition", "Distracted Driving Involved", "Unsafe Speed Involved", "Bicyclist Involved", "Rural Or Urban"]]
crashdata2.head()
print(pd.isnull(crashdata2).sum())
crashdata2=crashdata2.dropna()
print(pd.isnull(crashdata2).sum())
crashdata2
newData= crashdata2[crashdata2['Crash Year']>2014]
newData
newData.to_csv(r'C:\Users\deepd\Desktop\newData.csv', index=False)
newData
uniqueSeverity=pd.unique(newData["Severity"])
uniqueSeverity
uniqueSeverity=pd.unique(newData["Crash Month"])
uniqueSeverity
uniqueSeverity=pd.unique(newData["Crash Day Of Week"])
uniqueSeverity
uniqueSeverity=pd.unique(newData["Crash Date"])
uniqueSeverity
uniqueSeverity=pd.unique(newData["County"])
uniqueSeverity
uniqueSeverity=pd.unique(newData["Crash Year"])
uniqueSeverity
uniqueSeverity=pd.unique(newData["Crash Hour"])
uniqueSeverity
uniqueSeverity=pd.unique(newData["Crash Type"])
uniqueSeverity
uniqueSeverity=pd.unique(newData["Intersection"])
uniqueSeverity
uniqueSeverity=pd.unique(newData["Pedestrians Killed"])
uniqueSeverity
uniqueSeverity=pd.unique(newData["Hazmat Involved"])
uniqueSeverity
uniqueSeverity=pd.unique(newData["Light Condition"])
uniqueSeverity
uniqueSeverity=pd.unique(newData["Surface Condition"])
uniqueSeverity
uniqueSeverity=pd.unique(newData["Distracted Driving Involved"])
uniqueSeverity
uniqueSeverity=pd.unique(newData["Unsafe Speed Involved"])
uniqueSeverity
uniqueSeverity=pd.unique(newData["Bicyclist Involved"])
uniqueSeverity
uniqueSeverity=pd.unique(newData["Rural Or Urban"])
uniqueSeverity
uniqueSeverity=pd.unique(newData["Environmental Condition"])
uniqueSeverity
i=newData[newData["Environmental Condition"]=='Unknown'].index
newData.drop(i, inplace=True)
i=newData[newData["Crash Type"]=='Unknown'].index
newData.drop(i, inplace=True)
i=newData[newData["Light Condition"]=='Unknown'].index
newData.drop(i, inplace=True)
i=newData[newData["Surface Condition"]=='Unknown'].index
newData.drop(i, inplace=True)
i=newData[newData["Rural Or Urban"]=='Unknown'].index
newData.drop(i, inplace=True)
i=newData[newData["Pedestrians Killed"]==1].index
newData.drop(i, inplace=True)
i=newData[newData["Pedestrians Killed"]==2].index
newData.drop(i, inplace=True)
uniqueSeverity=pd.unique(newData["Environmental Condition"])
uniqueSeverity
uniqueSeverity=pd.unique(newData["Crash Type"])
uniqueSeverity
uniqueSeverity=pd.unique(newData["Rural Or Urban"])
uniqueSeverity
uniqueSeverity=pd.unique(newData["Surface Condition"])
uniqueSeverity
uniqueSeverity=pd.unique(newData["Light Condition"])
uniqueSeverity
uniqueSeverity=pd.unique(newData["Pedestrians Killed"])
uniqueSeverity
newData
HazmatInvolved_mapping= {"Hazmat Involved":{"No":0, "Yes":1}}
newData.replace(HazmatInvolved_mapping, inplace=True)
LightCondition_mapping={'Light Condition': {"Dark - Street Lights On (Spot)": 1, "Dark - Street Lights Not Present": 1, "Dark - Street Lights On (Continuous)": 1, "Dark - Street Lights Off": 1, "Daylight": 2, "Dusk": 3, "Dawn": 4}}
newData.replace(LightCondition_mapping, inplace= True)
RuralOrUrban_mapping= {"Rural Or Urban":{"Rural":0, "Urban":1}}
newData.replace(RuralOrUrban_mapping, inplace=True)
BicyclistInvolved_mapping= {"Bicyclist Involved":{"No":0, "Yes":1}}
newData.replace(BicyclistInvolved_mapping, inplace=True)
UnsafeSpeedInvolved_mapping= {"Unsafe Speed Involved":{"No":0, "Yes":1}}
newData.replace(UnsafeSpeedInvolved_mapping, inplace=True)
DistractedDrivingInvolved_mapping= {"Distracted Driving Involved":{"No":0, "Yes":1}}
newData.replace(DistractedDrivingInvolved_mapping, inplace=True)
SurfaceCondition_mapping= {"Surface Condition":{"Dry":1, "Wet":2, "Snowy":3, "Icy":3, "Slush":3, "Water (Standing or Moving)":2, "Sand":4, "Oil/Fuel":5, "Oil":5, "Sand / Mud / Dirt":4, "Mud, Dirt, Gravel":4, "Other":6}}
newData.replace(SurfaceCondition_mapping, inplace=True)
Severity_mapping= {"Severity":{"Fatal Injury":1, "Suspected Serious Injury":1, "Possible Injury":0, "Suspected Minor Injury":0, "No Apparent Injury":0}}
newData.replace(Severity_mapping, inplace=True)
Intersection_mapping= {"Intersection":{"Not Within Intersection Boundaries":1, "Within Intersection Boundaries":2, "At or near Railroad Crossing":3}}
newData.replace(Intersection_mapping, inplace=True)
EnvironmentalCondition_mapping= {"Environmental Condition":{"Clear":1, "Snow":2, "Rain":3, "Overcast":4, "Sleet / Hail":3, "Freezing Rain":3, "Sleet / Hail / Freezing Rain":3, "Fog / Smog / Smoke":5, "Severe Crosswinds":6, "Blowing Sand / Dirt":6, "Blowing Snow":2, "Other":7}}
newData.replace(EnvironmentalCondition_mapping, inplace=True)
CrashType_mapping= {"Crash Type":{"Fixed Object":1, "Same Direction - Side Swipe":2, "Same Direction - Rear End":3, "Right Angle":4, "Backing":5, "Animal":6, "Non-fixed Object":7, "Pedestrian":8, "Opposite Direction - Side Swipe":9, "Overturned":10, "Blowing Snow":11, "Left Turn / U Turn":12, "Struck Parked Vehicle":13, "Encroachment":14, "Opposite Direction - Head On/Angular":15, "Pedalcyclist":16, "Railcar-Vehicle":17, "Other":18}}
newData.replace(CrashType_mapping, inplace=True)
County_mapping= {"County":{"Essex":1, "Somerset":2, "Cumberland":3, "Bergen":4, "Atlantic":5, "Union":6, "Ocean":7, "Sussex":8, "Hudson":9, "Morris":10, "Hunterdon":11, "Passaic":12, "Warren":13, "Salem":14, "Gloucester":15, "Camden":16, "Monmouth":17, "Mercer":18, "Burlington":19, "Cape May":20, "Middlesex":21}}
newData.replace(County_mapping, inplace=True)
newData.replace(CrashDayOfWeek_mapping, inplace=True)
CrashMonth_mapping= {"Crash Month":{"January":1, "February":2, "March":3, "April":4, "May":5, "June":6, "July":7, "August":8, "September":9, "October":10, "November":11, "December":12}}
newData.replace(CrashMonth_mapping, inplace=True)
newData
newData
newData.to_csv(r'C:\Users\deepd\Desktop\newData.csv', index=False)
uniqueSeverity=pd.unique(newData["Light Condition"])
uniqueSeverity
uniqueSeverity=pd.unique(newData["County"])
uniqueSeverity
newData2 = newData[["County", "Crash Day Of Week", "Crash Month", "Crash Type", "Intersection", "Environmental Condition", "Severity", "Hazmat Involved", "Light Condition", "Surface Condition", "Distracted Driving Involved", "Unsafe Speed Involved", "Bicyclist Involved", "Rural Or Urban"]]
print(newData2.info)
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import accuracy_score
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.tree import DecisionTreeClassifier, export_graphviz
from sklearn.metrics import accuracy_score
import graphviz
predictors = newData2.drop(['Severity'], axis=1)
target = newData2["Severity"]
x_train, x_val, y_train, y_val = train_test_split(predictors, target, test_size = 0.20, random_state = 2)
decisiontree = DecisionTreeClassifier()
decisiontree.fit(x_train, y_train)
y_pred = decisiontree.predict(x_val)
acc_decisiontree = round(accuracy_score(y_pred, y_val) * 100, 2)
print(acc_decisiontree)
gaussian = GaussianNB()
gaussian.fit(x_train, y_train)
y_pred = gaussian.predict(x_val)
acc_gaussian = round(accuracy_score(y_pred, y_val) * 100, 2)
print(acc_gaussian)
log = LogisticRegression()
log = log.fit(x_train, y_train)
y_pred = log.predict(x_val)
acc_log = round(accuracy_score(y_pred, y_val) * 100, 2)
print(acc_log)
rf = RandomForestClassifier()
rf = rf.fit(x_train, y_train)
y_pred = rf.predict(x_val)
acc_rf = round(accuracy_score(y_pred, y_val) * 100, 2)
print(acc_rf)
kn = KNeighborsClassifier()
kn = log.fit(x_train, y_train)
y_pred = kn.predict(x_val)
acc_kn = round(accuracy_score(y_pred, y_val) * 100, 2)
print(acc_kn)
sns.heatmap(newData2.corr())
import os
import tarfile
from six.moves import urllib
type(newData.Latitude.tolist())
type(newData.Longitude.tolist())
# from bokeh.io import output_file, output_notebook, show
# from bokeh.models import (
# GMapPlot, GMapOptions, ColumnDataSource, Circle, LogColorMapper, BasicTicker, ColorBar,
# Range1d, PanTool, WheelZoomTool, BoxSelectTool
# )
# from bokeh.models.mappers import ColorMapper, LinearColorMapper
# from bokeh.palettes import Viridis5
# map_options = GMapOptions(lat=40.710574, lng=-74.174822, map_type="roadmap", zoom=6)
# plot = GMapPlot(
# x_range=Range1d(), y_range=Range1d(), map_options=map_options
# )
# plot.title.text = "NJ Crash PLOT!"
# plot.api_key = "AIzaSyB4Q3TJlLD4pPi0j4YMqf9cAeh2Ndm2FY4"
# source = ColumnDataSource(
# data=dict(
# lat=newData.Latitude.tolist(),
# lon=newData.Longitude.tolist()
# )
# )
# #color_mapper = CategoricalColorMapper(factors=['hi', 'lo'], palette=[RdBu3[2], RdBu3[0]])
# #color_mapper = LogColorMapper(palette="Viridis5", low=min_median_house_value, high=max_median_house_value)
# color_mapper = LinearColorMapper(palette=Viridis5)
# circle = Circle(x="lon", y="lat", fill_color={'transform': color_mapper}, fill_alpha=0.5, line_color=None)
# plot.add_glyph(source, circle)
# #color_bar = ColorBar(color_mapper=color_mapper, ticker=BasicTicker(),
# #label_standoff=12, border_line_color=None, location=(0,0))
# #plot.add_layout(color_bar, 'right')
# plot.add_tools(PanTool(), WheelZoomTool(), BoxSelectTool())
# #output_file("gmap_plot.html")
# output_notebook()
# show(plot)